//: C17:ReprocessHTML.cpp
// From Thinking in C++, 2nd Edition
// Available at http://www.BruceEckel.com
// (c) Bruce Eckel 1999
// Copyright notice in Copyright.txt
// Take Word's html output and fix up
// the code listings and html tags
#include "../require.h"
#include <iostream>
#include <fstream>
#include <string>
using namespace std;

// Produce a new string which is the original
// string with the html paragraph break marks
// stripped off:
string stripPBreaks(string s)
{
    int br;
    while ((br = s.find("<P>")) != string::npos)
        s.erase(br, strlen("<P>"));
    while ((br = s.find("</P>")) != string::npos)
        s.erase(br, strlen("</P>"));
    return s;
}

// After the beginning of a code listing is
// detected, this function cleans up the listing
// until the end marker is found. The first line
// of the listing is passed in by the caller,
// which detects the start marker in the line.
void fixupCodeListing(istream& in,
                      ostream& out, string& line, int tag)
{
    out << line.substr(0, tag)
    << "<PRE>" // Means "preformatted" in html
    << stripPBreaks(line.substr(tag)) << endl;
    string s;
    while (getline(in, s)) {
        int endtag = s.find("/""/""/"":~");
        if (endtag != string::npos) {
            endtag += strlen("/""/""/"":~");
            string before = s.substr(0, endtag);
            string after = s.substr(endtag);
            out << stripPBreaks(before) << "</PRE>"
            << after << endl;
            return;
        }
        out << stripPBreaks(s) << endl;
    }
}

string removals[] = {
    "<FONT SIZE=2>",
    "<FONT SIZE=1>",
    "<FONT FACE=\"Times\" SIZE=1>",
    "<FONT FACE=\"Times\" SIZE=2>",
    "<FONT FACE=\"Courier\" SIZE=1>",
    "SIZE=1", // Eliminate all other '1' & '2' size
    "SIZE=2",
};
const int rmsz =
    sizeof(removals)/sizeof(*removals);

int main(int argc, char* argv[])
{
    requireArgs(argc, 2);
    ifstream in(argv[1]);
    assure(in, argv[1]);
    ofstream out(argv[2]);
    string line;
    while (getline(in, line)) {
        // The "Body" tag only appears once:
        if (line.find("<BODY") != string::npos) {
            out << "<BODY BGCOLOR=\"#FFFFFF\" "
            "TEXT=\"#000000\">" << endl;
            continue; // Get next line
        }
        // Eliminate each of the removals strings:
        for (int i = 0; i < rmsz; i++) {
            int find = line.find(removals[i]);
            if (find != string::npos)
                line.erase(find, removals[i].size());
        }
        int tag1 = line.find("/""/"":");
        int tag2 = line.find("/""*"":");
        if (tag1 != string::npos)
            fixupCodeListing(in, out, line, tag1);
        else if (tag2 != string::npos)
            fixupCodeListing(in, out, line, tag2);
        else
            out << line << endl;
    }
} ///:~
